[Hadoop] 分布式文件系统HDFS 三

shell、Java API

Posted by 李玉坤 on 2017-06-05

HDFS shell

HDFS shell常用命令的使用

官网文档参考
http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSCommands.html#dfs
hdfs dfs等于hadoop fs

1
2
3
4
5
6
7
8
9
10
11
12
13
hadoop fs==> hdfs dfs 
[-cat [-ignoreCrc] <src> ...]
[-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...]
[-chown [-R] [OWNER][:[GROUP]] PATH...]

[-ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] [<path> ...]]

[-put [-f] [-p] [-l] <localsrc> ... <dst>]
[-get [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-copyFromLocal [-f] [-p] [-l] <localsrc> ... <dst>]
[-copyToLocal [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]

[-rm [-f] [-r|-R] [-skipTrash] <src> ...]

Is

1
2
3
4
[root@hadoop data]# hdfs dfs -ls /
18/11/09 21:40:49 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 1 items
drwxr-xr-x - root supergroup 0 2018-09-02 08:37 /hbase

get

1
[root@hadoop data]# hdfs dfs -get /hell.txt

mkdir

1
2
3
4
5
6
[root@hadoop data]# hdfs dfs -mkdir -p /text/a/b
18/11/09 21:44:57 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@hadoop data]# hdfs dfs -ls -R /text
18/11/09 21:46:25 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
drwxr-xr-x - root supergroup 0 2018-11-09 21:44 /text/a
drwxr-xr-x - root supergroup 0 2018-11-09 21:44 /text/a/b

rm

1
[root@hadoop data]# hdfs dfs -rm -R  /text/a/b

put

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
[root@hadoop data]# hdfs dfs -put hell.txt /
18/11/09 21:43:16 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@hadoop data]# hdfs dfs -ls /
18/11/09 21:43:25 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 2 items
drwxr-xr-x - root supergroup 0 2018-09-02 08:37 /hbase
-rw-r--r-- 1 root supergroup 13 2018-11-09 21:43 /hell.txt

切记检查生产环境是否开启回收站,CDH默认是开启的

开了回收站,慎用 -skipTrash (跳过回收站直接删除)

不建议使用:hdfs dfs -put -skipTrash /test.log
建议使用:hdfs dfs -put -rm -f /test.log

hdfs dfs -put /test.log 回收站 保留7天 自动删除


fs.trash.interval 10080 7天

fsck

1
2
# 文件系统检查 /从根目录检查
[root@hadoop ~]$ hdfs fsck /

Java API操作

  • IDEA+Maven创建Java工程



  • 添加HDFS相关依赖

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
</properties>

<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>

<dependencies>
<!--添加hadoop依赖-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>


<!--添加单元测试的依赖-->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
</dependencies>
  • 开发Java API操作HDFS文件
    注意两点:
  1. hdfs://hadoop000:8020更改访问ip和端口(在hadoop_home/etc/hadoop/core-site.xml)
  2. FileSystem.get(new URI(HDFS_PATH), configuration, “hadoop”)的用户hadoop需要更改为具有读写权限的用户
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
package com.kun.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URI;

/**
* Hadoop HDFS Java API 操作
*/
public class HDFSApp {

public static final String HDFS_PATH = "hdfs://hadoop000:8020";

FileSystem fileSystem = null;
Configuration configuration = null;


/**
* 创建HDFS目录
*/
@Test
public void mkdir() throws Exception {
fileSystem.mkdirs(new Path("/hdfsapi/test"));
}

/**
* 创建文件
*/
@Test
public void create() throws Exception {
FSDataOutputStream output = fileSystem.create(new Path("/hdfsapi/test/a.txt"));
output.write("hello hadoop".getBytes());
output.flush();
output.close();
}

/**
* 查看HDFS文件的内容
*/
@Test
public void cat() throws Exception {
FSDataInputStream in = fileSystem.open(new Path("/hdfsapi/test/a.txt"));
IOUtils.copyBytes(in, System.out, 1024);
in.close();
}


/**
* 重命名
*/
@Test
public void rename() throws Exception {
Path oldPath = new Path("/hdfsapi/test/a.txt");
Path newPath = new Path("/hdfsapi/test/b.txt");
fileSystem.rename(oldPath, newPath);
}

/**
* 上传文件到HDFS
*
* @throws Exception
*/
@Test
public void copyFromLocalFile() throws Exception {
Path localPath = new Path("/Users/rocky/data/hello.txt");
Path hdfsPath = new Path("/hdfsapi/test");
fileSystem.copyFromLocalFile(localPath, hdfsPath);
}

/**
* 上传文件到HDFS
*/
@Test
public void copyFromLocalFileWithProgress() throws Exception {
InputStream in = new BufferedInputStream(
new FileInputStream(
new File("/Users/rocky/source/spark-1.6.1/spark-1.6.1-bin-2.6.0-cdh5.5.0.tgz")));

FSDataOutputStream output = fileSystem.create(new Path("/hdfsapi/test/spark-1.6.1.tgz"),
new Progressable() {
public void progress() {
System.out.print("."); //带进度提醒信息
}
});


IOUtils.copyBytes(in, output, 4096);
}


/**
* 下载HDFS文件
*/
@Test
public void copyToLocalFile() throws Exception {
Path localPath = new Path("/Users/rocky/tmp/h.txt");
Path hdfsPath = new Path("/hdfsapi/test/hello.txt");
fileSystem.copyToLocalFile(hdfsPath, localPath);
}

/**
* 查看某个目录下的所有文件
*/
@Test
public void listFiles() throws Exception {
FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/"));

for(FileStatus fileStatus : fileStatuses) {
String isDir = fileStatus.isDirectory() ? "文件夹" : "文件";
short replication = fileStatus.getReplication();
long len = fileStatus.getLen();
String path = fileStatus.getPath().toString();

System.out.println(isDir + "\t" + replication + "\t" + len + "\t" + path);
}

}

/**
* 删除
*/
@Test
public void delete() throws Exception{
fileSystem.delete(new Path("/"), true);
}


@Before
public void setUp() throws Exception {
System.out.println("HDFSApp.setUp");
configuration = new Configuration();
fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration, "hadoop");
}

@After
public void tearDown() throws Exception {
configuration = null;
fileSystem = null;

System.out.println("HDFSApp.tearDown");
}

}